"""
Phase 6: Referee Fixes — Real Exchange Rate
=============================================
Addresses referee comments on:
  A) Effect size rescaling (Z₁ = -1.24 seems implausible)
  B) Balanced panel pre/post-2000 split (composition vs structural break)
  C) Labor supply mechanism proxy (LFP from WDI)
  D) Country FE + Year FE robustness
  E) Long differences (alternative to cointegration)
  F) KAOPEN narrative check (interaction vs subsample)
"""

import sys
from pathlib import Path

import numpy as np
import pandas as pd
import urllib.request
import json
import time

PROJECT_DIR = Path("/mnt/c/demographics_capital_flows/rer")
ROOT_DIR = PROJECT_DIR.parent
sys.path.insert(0, str(ROOT_DIR / "multilateral" / "src"))
from model import PanelGLS

PROCESSED_DIR = PROJECT_DIR / "data" / "processed"
RAW_DIR = PROJECT_DIR / "data" / "raw"
TABLES_DIR = PROJECT_DIR / "output" / "tables"

RAW_DIR.mkdir(parents=True, exist_ok=True)
TABLES_DIR.mkdir(parents=True, exist_ok=True)


def stars(p):
    if p < 0.01: return '***'
    if p < 0.05: return '**'
    if p < 0.10: return '*'
    return ''


def run_model(df, dep_var, regressors, label, verbose=True):
    """Run PanelGLS and return results dict."""
    regressors = [r for r in regressors if r in df.columns]
    if dep_var not in df.columns:
        print(f"  [{label}] {dep_var} missing — skipping")
        return None

    sub = df.dropna(subset=[dep_var] + regressors).copy()
    if len(sub) < 50:
        print(f"  [{label}] Insufficient obs ({len(sub)}) — skipping")
        return None

    gls = PanelGLS()
    gls.fit(sub[dep_var].values, sub[regressors].values,
            sub['iso3'].values, sub['year'].values)

    if verbose:
        print(f"\n  [{label}]  N={gls.n_obs}, countries={gls.n_countries}, "
              f"R²={gls.r_squared:.4f}")

    results = {
        'label': label, 'dep_var': dep_var,
        'n_obs': gls.n_obs, 'n_countries': gls.n_countries,
        'r_squared': gls.r_squared, 'rho': gls.rho,
    }
    for i, name in enumerate(regressors):
        results[f'coef_{name}'] = gls.beta[i]
        results[f'se_{name}'] = gls.se[i]
        results[f'p_{name}'] = gls.pvalues[i]
        sig = stars(gls.pvalues[i])
        if verbose:
            print(f"    {name:<25} {results[f'coef_{name}']:>10.4f} "
                  f"({results[f'se_{name}']:.4f}) {sig}")

    return results


# ═══════════════════════════════════════════════════════════════════════════
# LOAD DATA
# ═══════════════════════════════════════════════════════════════════════════
print("=" * 70)
print("PHASE 6: Referee Fixes — Real Exchange Rate")
print("=" * 70)

df = pd.read_csv(PROCESSED_DIR / "rer_panel.csv")
df = df[df['year'] <= 2024].copy()
print(f"\nPanel loaded: {df['iso3'].nunique()} countries, {len(df):,} obs")
print(f"Years: {df['year'].min()}–{df['year'].max()}")
print(f"\nColumns ({len(df.columns)}):")
print(list(df.columns))

# Check dependent variable
print(f"\nlog_reer_combined stats:")
print(df['log_reer_combined'].describe())
print(f"\nreer_combined stats:")
print(df['reer_combined'].describe())

# Standard controls
demo_vars = ['Z_1', 'Z_2', 'Z_3']
controls_bs = ['log_gdp_pc', 'rgdp_growth', 'kaopen', 'nfa_gdp_lag']
controls_bs_trade = controls_bs + ['trade_openness']


# ═══════════════════════════════════════════════════════════════════════════
# PART A: Effect Size Rescaling
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART A: Effect Size Rescaling")
print("=" * 70)

# Work with the estimation sample (non-missing on dep + key regressors)
est_cols = ['log_reer_combined'] + demo_vars + controls_bs
est = df.dropna(subset=est_cols).copy()
print(f"\nEstimation sample: {est['iso3'].nunique()} countries, {len(est):,} obs")

# 1. Z₁ variation decomposition
total_sd = est['Z_1'].std()
within_sd = est.groupby('iso3')['Z_1'].transform(lambda x: x - x.mean()).std()
between_sd = est.groupby('iso3')['Z_1'].mean().std()
print(f"\nZ₁ variation decomposition:")
print(f"  Total SD:          {total_sd:.4f}")
print(f"  Within-country SD: {within_sd:.4f}")
print(f"  Between-country SD:{between_sd:.4f}")

# IQR
z1_q25, z1_q75 = est['Z_1'].quantile([0.25, 0.75])
z1_iqr = z1_q75 - z1_q25
print(f"  IQR:               {z1_iqr:.4f} (Q25={z1_q25:.4f}, Q75={z1_q75:.4f})")

# 2. Baseline coefficient
coef_z1 = -1.2411  # from baseline A2
print(f"\nBaseline Z₁ coefficient: {coef_z1:.4f}")

# Check units: is log_reer_combined in natural log or log base 10?
# If reer_combined ≈ 100 and log_reer_combined ≈ 4.6, it's natural log
sample_reer = est[['reer_combined', 'log_reer_combined']].dropna().head(5)
print(f"\nSample REER vs log(REER):")
print(sample_reer.to_string())
print(f"  exp(log_reer_combined) ≈ reer_combined? "
      f"{np.exp(est['log_reer_combined'].mean()):.1f} vs {est['reer_combined'].mean():.1f}")

# Effect sizes
within_effect = coef_z1 * within_sd
iqr_effect = coef_z1 * z1_iqr
print(f"\nEffect sizes (in log points):")
print(f"  1 within-SD of Z₁:  {within_effect:.4f} log pts → "
      f"{(np.exp(within_effect) - 1)*100:.1f}% REER change")
print(f"  IQR of Z₁:          {iqr_effect:.4f} log pts → "
      f"{(np.exp(iqr_effect) - 1)*100:.1f}% REER change")

# 3. Korea example
kor = df[df['iso3'] == 'KOR'].copy()
kor_1990 = kor.loc[kor['year'] == 1990, 'Z_1'].values
kor_2020 = kor.loc[kor['year'] == 2020, 'Z_1'].values
if len(kor_1990) > 0 and len(kor_2020) > 0:
    dz1_kor = kor_2020[0] - kor_1990[0]
    kor_effect = coef_z1 * dz1_kor
    print(f"\n  Korea: Z₁ in 1990 = {kor_1990[0]:.4f}, Z₁ in 2020 = {kor_2020[0]:.4f}")
    print(f"  ΔZ₁ = {dz1_kor:.4f}")
    print(f"  Predicted REER change: {kor_effect:.4f} log pts → "
          f"{(np.exp(kor_effect) - 1)*100:.1f}%")
else:
    print("  Korea data not available for 1990 or 2020")
    dz1_kor = np.nan
    kor_effect = np.nan

# Also compute for Japan and Germany
for cty, cname in [('JPN', 'Japan'), ('DEU', 'Germany')]:
    c = df[df['iso3'] == cty]
    c90 = c.loc[c['year'] == 1990, 'Z_1'].values
    c20 = c.loc[c['year'] == 2020, 'Z_1'].values
    if len(c90) > 0 and len(c20) > 0:
        dz = c20[0] - c90[0]
        eff = coef_z1 * dz
        print(f"  {cname}: ΔZ₁ = {dz:.4f} → {(np.exp(eff)-1)*100:.1f}% REER change")

# Save Part A table
md_a = ["# Effect Size Rescaling\n"]
md_a.append("## Z₁ Variation Decomposition\n")
md_a.append("| Metric | Value |")
md_a.append("|---|---|")
md_a.append(f"| Total SD | {total_sd:.4f} |")
md_a.append(f"| Within-country SD | {within_sd:.4f} |")
md_a.append(f"| Between-country SD | {between_sd:.4f} |")
md_a.append(f"| IQR (Q75 − Q25) | {z1_iqr:.4f} |")
md_a.append(f"| Q25 | {z1_q25:.4f} |")
md_a.append(f"| Q75 | {z1_q75:.4f} |")

md_a.append("\n## Rescaled Effect Sizes\n")
md_a.append("| Variation | ΔZ₁ | Δlog(REER) | % REER Change |")
md_a.append("|---|---|---|---|")
md_a.append(f"| 1 unit (referee's scenario) | 1.000 | {coef_z1:.4f} | "
            f"{(np.exp(coef_z1)-1)*100:.1f}% |")
md_a.append(f"| 1 within-country SD | {within_sd:.4f} | {within_effect:.4f} | "
            f"{(np.exp(within_effect)-1)*100:.1f}% |")
md_a.append(f"| IQR | {z1_iqr:.4f} | {iqr_effect:.4f} | "
            f"{(np.exp(iqr_effect)-1)*100:.1f}% |")
if not np.isnan(dz1_kor):
    md_a.append(f"| Korea 1990→2020 | {dz1_kor:.4f} | {kor_effect:.4f} | "
                f"{(np.exp(kor_effect)-1)*100:.1f}% |")

md_a.append(f"\n*Baseline Z₁ coefficient = {coef_z1:.4f} on log(REER). "
            f"Dep var is natural log of REER index (base ≈ 100). "
            f"A 1-unit change in Z₁ is far outside the within-country range "
            f"(within SD = {within_sd:.4f}).*")

(TABLES_DIR / "referee_effect_scaling.md").write_text('\n'.join(md_a))
print(f"\n  Saved: {TABLES_DIR / 'referee_effect_scaling.md'}")


# ═══════════════════════════════════════════════════════════════════════════
# PART B: Balanced Panel Pre/Post-2000 Split
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART B: Balanced Panel Pre/Post-2000 Split")
print("=" * 70)

reg_vars = demo_vars + controls_bs
sub = df.dropna(subset=['log_reer_combined'] + reg_vars).copy()

# Countries in both periods
pre = sub[sub['year'] <= 2000]
post = sub[sub['year'] >= 2001]
countries_pre = set(pre['iso3'].unique())
countries_post = set(post['iso3'].unique())
balanced_countries = sorted(countries_pre & countries_post)
print(f"\n  Countries in pre-2000: {len(countries_pre)}")
print(f"  Countries in post-2000: {len(countries_post)}")
print(f"  Countries in BOTH (balanced): {len(balanced_countries)}")

bal = sub[sub['iso3'].isin(balanced_countries)].copy()
bal_pre = bal[bal['year'] <= 2000]
bal_post = bal[bal['year'] >= 2001]

print(f"\n  Balanced pre-2000: {len(bal_pre)} obs, {bal_pre['iso3'].nunique()} countries")
print(f"  Balanced post-2001: {len(bal_post)} obs, {bal_post['iso3'].nunique()} countries")

b_results = []

# B1: Full unbalanced pre-2000 (for comparison — same as paper)
r = run_model(pre, 'log_reer_combined', reg_vars, "B1: Unbalanced pre-2000")
if r: b_results.append(r)

# B2: Full unbalanced post-2001
r = run_model(post, 'log_reer_combined', reg_vars, "B2: Unbalanced post-2001")
if r: b_results.append(r)

# B3: Balanced pre-2000
r = run_model(bal_pre, 'log_reer_combined', reg_vars, "B3: Balanced pre-2000")
if r: b_results.append(r)

# B4: Balanced post-2001
r = run_model(bal_post, 'log_reer_combined', reg_vars, "B4: Balanced post-2001")
if r: b_results.append(r)

# Save Part B table
md_b = ["# Balanced Panel Pre/Post-2000 Split\n"]
md_b.append(f"Balanced subset: {len(balanced_countries)} countries present in both periods.\n")
md_b.append("| Model | N | Countries | R² | Z₁ coef | Z₁ SE | Z₁ p | Sig |")
md_b.append("|---|---|---|---|---|---|---|---|")
for r in b_results:
    z1c = r.get('coef_Z_1', np.nan)
    z1s = r.get('se_Z_1', np.nan)
    z1p = r.get('p_Z_1', np.nan)
    md_b.append(f"| {r['label']} | {r['n_obs']:,} | {r['n_countries']} | "
                f"{r['r_squared']:.3f} | {z1c:.4f} | {z1s:.4f} | {z1p:.4f} | {stars(z1p)} |")

md_b.append("\n## Full Coefficient Table\n")
md_b.append("| Model | Variable | Coef | SE | p-value | Sig |")
md_b.append("|---|---|---|---|---|---|")
for r in b_results:
    for var in demo_vars + controls_bs:
        ckey = f'coef_{var}'
        if ckey in r:
            p = r[f'p_{var}']
            md_b.append(f"| {r['label']} | {var} | {r[ckey]:.4f} | "
                        f"{r[f'se_{var}']:.4f} | {p:.4f} | {stars(p)} |")

md_b.append("\n*Controls: log_gdp_pc, rgdp_growth, kaopen, nfa_gdp_lag*")

(TABLES_DIR / "referee_balanced_panel.md").write_text('\n'.join(md_b))
print(f"\n  Saved: {TABLES_DIR / 'referee_balanced_panel.md'}")


# ═══════════════════════════════════════════════════════════════════════════
# PART C: Labor Supply Mechanism Proxy
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART C: Labor Supply Mechanism Proxy")
print("=" * 70)

# Download LFP from WDI
lfp_cache = RAW_DIR / "wdi_lfp.csv"
if lfp_cache.exists():
    print("  Loading cached LFP data...")
    lfp_df = pd.read_csv(lfp_cache)
else:
    print("  Downloading LFP data from World Bank...")
    all_lfp = []
    page = 1
    while True:
        url = (f'https://api.worldbank.org/v2/country/all/indicator/SL.TLF.CACT.ZS'
               f'?date=1990:2024&format=json&per_page=10000&page={page}')
        try:
            req = urllib.request.Request(url)
            with urllib.request.urlopen(req, timeout=30) as resp:
                raw = json.loads(resp.read().decode())
            if len(raw) < 2 or raw[1] is None:
                break
            for item in raw[1]:
                if item['value'] is not None:
                    all_lfp.append({
                        'iso3': item['countryiso3code'],
                        'year': int(item['date']),
                        'lfp_rate': float(item['value'])
                    })
            total_pages = raw[0].get('pages', 1)
            if page >= total_pages:
                break
            page += 1
            time.sleep(0.5)
        except Exception as e:
            print(f"  WDI download failed: {e}")
            break

    if all_lfp:
        lfp_df = pd.DataFrame(all_lfp)
        lfp_df = lfp_df[lfp_df['iso3'] != ''].copy()
        lfp_df.to_csv(lfp_cache, index=False)
        print(f"  Saved {len(lfp_df)} LFP observations to {lfp_cache}")
    else:
        lfp_df = pd.DataFrame(columns=['iso3', 'year', 'lfp_rate'])
        print("  No LFP data downloaded")

# Merge LFP into panel
if len(lfp_df) > 0:
    df = df.merge(lfp_df[['iso3', 'year', 'lfp_rate']], on=['iso3', 'year'], how='left')
    print(f"  LFP coverage: {df['lfp_rate'].notna().sum()} obs, "
          f"{df.loc[df['lfp_rate'].notna(), 'iso3'].nunique()} countries")

c_results = []

# C1: Z → LFP (does aging predict lower labor force participation?)
if 'lfp_rate' in df.columns and df['lfp_rate'].notna().sum() > 100:
    r = run_model(df, 'lfp_rate', demo_vars + ['log_gdp_pc', 'rgdp_growth'],
                  "C1: Z → LFP")
    if r: c_results.append(r)

    # C2: LFP → log(REER)
    r = run_model(df, 'log_reer_combined', ['lfp_rate'] + controls_bs,
                  "C2: LFP → log(REER)")
    if r: c_results.append(r)

    # C3: Z → log(REER) WITHOUT LFP
    r = run_model(df, 'log_reer_combined', demo_vars + controls_bs,
                  "C3: Z → log(REER) (no LFP)")
    if r: c_results.append(r)

    # C4: Z → log(REER) WITH LFP control (attenuation test)
    r = run_model(df, 'log_reer_combined', demo_vars + controls_bs + ['lfp_rate'],
                  "C4: Z → log(REER) + LFP")
    if r: c_results.append(r)
else:
    print("  LFP data not available, skipping LFP tests")

# C5: Z → rgdp_growth (productivity proxy)
r = run_model(df, 'rgdp_growth', demo_vars + ['log_gdp_pc', 'kaopen', 'nfa_gdp_lag'],
              "C5: Z → GDP growth")
if r: c_results.append(r)

# C6: Z → log(REER) controlling for rgdp_growth (already in baseline, but explicit)
# Already have rgdp_growth in controls, so compare with/without
r = run_model(df, 'log_reer_combined', demo_vars + ['log_gdp_pc', 'kaopen', 'nfa_gdp_lag'],
              "C6: Z → log(REER) (no growth)")
if r: c_results.append(r)

# C7: working_age_share attenuation (already in paper, replicate here)
if 'working_age_share' in df.columns:
    r = run_model(df, 'log_reer_combined', demo_vars + controls_bs + ['working_age_share'],
                  "C7: Z + working_age_share")
    if r: c_results.append(r)

# Save Part C table
md_c = ["# Labor Supply Mechanism Tests\n"]
md_c.append("| Model | Dep Var | N | Countries | R² |")
md_c.append("|---|---|---|---|---|")
for r in c_results:
    md_c.append(f"| {r['label']} | {r['dep_var']} | {r['n_obs']:,} | "
                f"{r['n_countries']} | {r['r_squared']:.3f} |")

md_c.append("\n## Key Coefficients\n")
md_c.append("| Model | Variable | Coef | SE | p-value | Sig |")
md_c.append("|---|---|---|---|---|---|")
key_mech_vars = demo_vars + ['lfp_rate', 'working_age_share', 'log_gdp_pc', 'rgdp_growth']
for r in c_results:
    for var in key_mech_vars:
        ckey = f'coef_{var}'
        if ckey in r:
            p = r[f'p_{var}']
            md_c.append(f"| {r['label']} | {var} | {r[ckey]:.4f} | "
                        f"{r[f'se_{var}']:.4f} | {p:.4f} | {stars(p)} |")

# Attenuation summary
if len(c_results) >= 4:
    c3_z1 = next((r for r in c_results if 'C3' in r['label']), None)
    c4_z1 = next((r for r in c_results if 'C4' in r['label']), None)
    if c3_z1 and c4_z1:
        att = ((c3_z1.get('coef_Z_1', 0) - c4_z1.get('coef_Z_1', 0))
               / c3_z1.get('coef_Z_1', 1) * 100)
        md_c.append(f"\n*Z₁ attenuation when adding LFP: "
                    f"{c3_z1.get('coef_Z_1',0):.4f} → {c4_z1.get('coef_Z_1',0):.4f} "
                    f"({att:.1f}%)*")

md_c.append("\n*Mechanism chain: Demographics → labor supply (LFP) → "
            "relative productivity → REER*")

(TABLES_DIR / "referee_labor_mechanism.md").write_text('\n'.join(md_c))
print(f"\n  Saved: {TABLES_DIR / 'referee_labor_mechanism.md'}")


# ═══════════════════════════════════════════════════════════════════════════
# PART D: Country FE + Year FE Robustness
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART D: Country FE + Year FE Robustness")
print("=" * 70)

d_results = []
reg_vars_d = demo_vars + controls_bs

# D1: Baseline PanelGLS (no FE)
r = run_model(df, 'log_reer_combined', reg_vars_d, "D1: Baseline (no FE)")
if r: d_results.append(r)

# D2: + Year dummies
sub_d = df.dropna(subset=['log_reer_combined'] + reg_vars_d).copy()
year_dummies = pd.get_dummies(sub_d['year'], prefix='yr', drop_first=True, dtype=float)
yr_cols = list(year_dummies.columns)
sub_d = pd.concat([sub_d.reset_index(drop=True), year_dummies.reset_index(drop=True)], axis=1)

r = run_model(sub_d, 'log_reer_combined', reg_vars_d + yr_cols, "D2: + Year FE")
if r:
    # Only keep non-year-dummy coefficients for display
    r_display = {k: v for k, v in r.items()
                 if not (k.startswith('coef_yr') or k.startswith('se_yr') or k.startswith('p_yr'))}
    d_results.append(r)

# D3: Country-demeaned (within) + Year dummies
sub_w = df.dropna(subset=['log_reer_combined'] + reg_vars_d).copy()
demean_vars = ['log_reer_combined'] + reg_vars_d
for col in demean_vars:
    sub_w[col + '_demean'] = sub_w.groupby('iso3')[col].transform(lambda x: x - x.mean())

demean_regs = [v + '_demean' for v in reg_vars_d]
year_dummies_w = pd.get_dummies(sub_w['year'], prefix='yr', drop_first=True, dtype=float)
yr_cols_w = list(year_dummies_w.columns)
sub_w = pd.concat([sub_w.reset_index(drop=True), year_dummies_w.reset_index(drop=True)], axis=1)

r = run_model(sub_w, 'log_reer_combined_demean', demean_regs + yr_cols_w,
              "D3: Within + Year FE")
if r: d_results.append(r)

# Save Part D table
md_d = ["# Country FE + Year FE Robustness\n"]
md_d.append("| Model | N | Countries | R² | Z₁ coef | Z₁ SE | Z₁ p | Sig |")
md_d.append("|---|---|---|---|---|---|---|---|")
for r in d_results:
    # Handle demeaned variable names
    z1_key = 'coef_Z_1' if 'coef_Z_1' in r else 'coef_Z_1_demean'
    z1s_key = 'se_Z_1' if 'se_Z_1' in r else 'se_Z_1_demean'
    z1p_key = 'p_Z_1' if 'p_Z_1' in r else 'p_Z_1_demean'
    z1c = r.get(z1_key, np.nan)
    z1s = r.get(z1s_key, np.nan)
    z1p = r.get(z1p_key, np.nan)
    md_d.append(f"| {r['label']} | {r['n_obs']:,} | {r['n_countries']} | "
                f"{r['r_squared']:.3f} | {z1c:.4f} | {z1s:.4f} | {z1p:.4f} | {stars(z1p)} |")

md_d.append("\n## Full Coefficients (excluding year dummies)\n")
md_d.append("| Model | Variable | Coef | SE | p-value | Sig |")
md_d.append("|---|---|---|---|---|---|")
display_vars = demo_vars + controls_bs
for r in d_results:
    for var in display_vars:
        for suffix in ['', '_demean']:
            ckey = f'coef_{var}{suffix}'
            if ckey in r:
                p = r[f'p_{var}{suffix}']
                vname = var  # show clean name
                md_d.append(f"| {r['label']} | {vname} | {r[ckey]:.4f} | "
                            f"{r[f'se_{var}{suffix}']:.4f} | {p:.4f} | {stars(p)} |")

md_d.append("\n*D1: Pooled GLS (no FE). D2: Pooled GLS + year dummies. "
            "D3: Country-demeaned + year dummies (within estimator).*")

(TABLES_DIR / "referee_fe_robustness.md").write_text('\n'.join(md_d))
print(f"\n  Saved: {TABLES_DIR / 'referee_fe_robustness.md'}")


# ═══════════════════════════════════════════════════════════════════════════
# PART E: Long Differences
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART E: Long Differences (Alternative to Cointegration)")
print("=" * 70)

e_results = []
ld_vars = ['log_reer_combined'] + demo_vars + controls_bs

for horizon, hlabel in [(10, '10yr'), (5, '5yr')]:
    ld = df.dropna(subset=ld_vars).copy()
    ld = ld.sort_values(['iso3', 'year'])

    # Compute long differences
    for var in ld_vars:
        ld[f'd{horizon}_{var}'] = ld.groupby('iso3')[var].diff(horizon)

    ld_dep = f'd{horizon}_log_reer_combined'
    ld_regs = [f'd{horizon}_{v}' for v in demo_vars + controls_bs]
    ld_clean = ld.dropna(subset=[ld_dep] + ld_regs)
    print(f"\n  {hlabel} differences: {len(ld_clean)} obs, "
          f"{ld_clean['iso3'].nunique()} countries")

    if len(ld_clean) > 50:
        gls = PanelGLS()
        gls.fit(ld_clean[ld_dep].values, ld_clean[ld_regs].values,
                ld_clean['iso3'].values, ld_clean['year'].values)

        r = {
            'label': f'E: Δ{horizon} differences',
            'dep_var': ld_dep,
            'n_obs': gls.n_obs,
            'n_countries': gls.n_countries,
            'r_squared': gls.r_squared,
            'rho': gls.rho,
        }
        print(f"  [{r['label']}]  N={gls.n_obs}, countries={gls.n_countries}, "
              f"R²={gls.r_squared:.4f}")
        orig_names = demo_vars + controls_bs
        for i, name in enumerate(orig_names):
            r[f'coef_{name}'] = gls.beta[i]
            r[f'se_{name}'] = gls.se[i]
            r[f'p_{name}'] = gls.pvalues[i]
            sig = stars(gls.pvalues[i])
            print(f"    Δ{horizon}_{name:<20} {gls.beta[i]:>10.4f} ({gls.se[i]:.4f}) {sig}")

        e_results.append(r)

# Save Part E table
md_e = ["# Long Differences (Alternative to Cointegration)\n"]
md_e.append("| Model | N | Countries | R² | ΔZ₁ coef | ΔZ₁ SE | ΔZ₁ p | Sig |")
md_e.append("|---|---|---|---|---|---|---|---|")
for r in e_results:
    z1c = r.get('coef_Z_1', np.nan)
    z1s = r.get('se_Z_1', np.nan)
    z1p = r.get('p_Z_1', np.nan)
    md_e.append(f"| {r['label']} | {r['n_obs']:,} | {r['n_countries']} | "
                f"{r['r_squared']:.3f} | {z1c:.4f} | {z1s:.4f} | {z1p:.4f} | {stars(z1p)} |")

md_e.append("\n## Full Coefficients\n")
md_e.append("| Model | Variable | Coef | SE | p-value | Sig |")
md_e.append("|---|---|---|---|---|---|")
for r in e_results:
    h = '10' if '10' in r['label'] else '5'
    for var in demo_vars + controls_bs:
        ckey = f'coef_{var}'
        if ckey in r:
            p = r[f'p_{var}']
            md_e.append(f"| {r['label']} | Δ{h}_{var} | {r[ckey]:.4f} | "
                        f"{r[f'se_{var}']:.4f} | {p:.4f} | {stars(p)} |")

md_e.append("\n*Long differences: Δₕ y = y_t - y_{t-h}. Tests whether "
            "demographic changes over h years predict REER changes over h years.*")

(TABLES_DIR / "referee_long_differences.md").write_text('\n'.join(md_e))
print(f"\n  Saved: {TABLES_DIR / 'referee_long_differences.md'}")


# ═══════════════════════════════════════════════════════════════════════════
# PART F: KAOPEN Narrative Check
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART F: KAOPEN Narrative Check")
print("=" * 70)

f_results = []

# F1: Z₁ × KAOPEN interaction
interact_vars = demo_vars + controls_bs + ['Z_1_x_kaopen']
r = run_model(df, 'log_reer_combined', interact_vars, "F1: Z₁ × KAOPEN interaction")
if r: f_results.append(r)

# F2-F4: Subsample splits by KAOPEN tercile
sub_f = df.dropna(subset=['log_reer_combined'] + demo_vars + controls_bs + ['kaopen']).copy()
# KAOPEN is heavily discretized — use unique sorted values for splits
kaopen_vals = np.sort(sub_f['kaopen'].unique())
print(f"\n  KAOPEN unique values: {len(kaopen_vals)}")
print(f"  KAOPEN range: [{sub_f['kaopen'].min():.3f}, {sub_f['kaopen'].max():.3f}]")
print(f"  KAOPEN median: {sub_f['kaopen'].median():.3f}")

# Use median split first, then also try bottom-quarter vs top-quarter
kaopen_med = sub_f['kaopen'].median()
kaopen_q25 = sub_f['kaopen'].quantile(0.25)
kaopen_q75 = sub_f['kaopen'].quantile(0.75)
t1, t2 = kaopen_q25, kaopen_q75
print(f"  KAOPEN Q25={t1:.3f}, median={kaopen_med:.3f}, Q75={t2:.3f}")

closed = sub_f[sub_f['kaopen'] <= t1]
mid = sub_f[(sub_f['kaopen'] > t1) & (sub_f['kaopen'] <= t2)]
open_ = sub_f[sub_f['kaopen'] > t2]

# If open is empty due to discrete values, use strictly less than max
if len(open_) == 0:
    # Use below-median vs above-median
    closed = sub_f[sub_f['kaopen'] < kaopen_med]
    open_ = sub_f[sub_f['kaopen'] >= kaopen_med]
    mid = pd.DataFrame()  # skip mid
    print(f"  Falling back to median split (KAOPEN too discrete for terciles)")

print(f"  Closed: {len(closed)} obs, {closed['iso3'].nunique()} countries")
if len(mid) > 0:
    print(f"  Mid: {len(mid)} obs, {mid['iso3'].nunique()} countries")
print(f"  Open: {len(open_)} obs, {open_['iso3'].nunique()} countries")

# Use controls without kaopen (since we're splitting on it)
controls_no_kaopen = ['log_gdp_pc', 'rgdp_growth', 'nfa_gdp_lag']

r = run_model(closed, 'log_reer_combined', demo_vars + controls_no_kaopen,
              "F2: Closed economies")
if r: f_results.append(r)

if len(mid) > 50:
    r = run_model(mid, 'log_reer_combined', demo_vars + controls_no_kaopen,
                  "F3: Mid openness")
    if r: f_results.append(r)

r = run_model(open_, 'log_reer_combined', demo_vars + controls_no_kaopen,
              "F4: Open economies")
if r: f_results.append(r)

# Save Part F table
md_f = ["# KAOPEN Narrative Check\n"]
md_f.append("| Model | N | Countries | R² | Z₁ coef | Z₁ SE | Z₁ p | Sig |")
md_f.append("|---|---|---|---|---|---|---|---|")
for r in f_results:
    z1c = r.get('coef_Z_1', np.nan)
    z1s = r.get('se_Z_1', np.nan)
    z1p = r.get('p_Z_1', np.nan)
    md_f.append(f"| {r['label']} | {r['n_obs']:,} | {r['n_countries']} | "
                f"{r['r_squared']:.3f} | {z1c:.4f} | {z1s:.4f} | {z1p:.4f} | {stars(z1p)} |")

md_f.append("\n## Full Coefficients\n")
md_f.append("| Model | Variable | Coef | SE | p-value | Sig |")
md_f.append("|---|---|---|---|---|---|")
key_f_vars = demo_vars + ['Z_1_x_kaopen'] + controls_bs + controls_no_kaopen
for r in f_results:
    for var in key_f_vars:
        ckey = f'coef_{var}'
        if ckey in r:
            p = r[f'p_{var}']
            md_f.append(f"| {r['label']} | {var} | {r[ckey]:.4f} | "
                        f"{r[f'se_{var}']:.4f} | {p:.4f} | {stars(p)} |")

# Interpretation
f1 = next((r for r in f_results if 'F1' in r['label']), None)
if f1 and 'coef_Z_1_x_kaopen' in f1:
    interact_coef = f1['coef_Z_1_x_kaopen']
    interact_p = f1['p_Z_1_x_kaopen']
    if interact_coef < 0:
        md_f.append(f"\n*Interaction Z₁×KAOPEN = {interact_coef:.4f} (p={interact_p:.4f}): "
                    f"negative → openness amplifies the demographic depreciation effect.*")
    else:
        md_f.append(f"\n*Interaction Z₁×KAOPEN = {interact_coef:.4f} (p={interact_p:.4f}): "
                    f"positive → openness attenuates the demographic depreciation effect.*")

md_f.append(f"\n*KAOPEN tercile cutoffs: {t1:.3f}, {t2:.3f}*")

(TABLES_DIR / "referee_kaopen_check.md").write_text('\n'.join(md_f))
print(f"\n  Saved: {TABLES_DIR / 'referee_kaopen_check.md'}")


# ═══════════════════════════════════════════════════════════════════════════
# SUMMARY
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PHASE 6 COMPLETE — All referee response tables saved")
print("=" * 70)
print(f"  A) {TABLES_DIR / 'referee_effect_scaling.md'}")
print(f"  B) {TABLES_DIR / 'referee_balanced_panel.md'}")
print(f"  C) {TABLES_DIR / 'referee_labor_mechanism.md'}")
print(f"  D) {TABLES_DIR / 'referee_fe_robustness.md'}")
print(f"  E) {TABLES_DIR / 'referee_long_differences.md'}")
print(f"  F) {TABLES_DIR / 'referee_kaopen_check.md'}")
